---
title: "Food safety in NYC"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
source: embed
navbar:
- { title: "Home", href: index.html, align: left }
- { title: "About", href: about.html, align: left }
- { title: "Resume", href: resume.html, align: left }
- { title: "Resume as doc", href: "./Johnstone Tcheou Resume.pdf", align: left }
---
```{r setup, include=FALSE}
library(p8105.datasets)
library(tidyverse)
library(flexdashboard)
library(plotly)
```
```{r import data}
data("rest_inspec")
```
Column {data-width=450}
-----------------------------------------------------------------------
### Distribution of scores by borough
```{r distribution of scores by borough}
rest_inspec |>
filter(!(boro == "Missing") & !is.na(score)) |>
mutate(
boro = factor(boro),
boro = fct_relevel(boro, c("STATEN ISLAND", "BRONX", "QUEENS", "MANHATTAN", "BROOKLYN"))
) |>
plot_ly(x = ~boro, y = ~score, color = ~boro, type = "violin", colors = "viridis") |>
layout(
title = "NYC DOHMH restaurant sanitary inspection scores by borough",
yaxis = list(title = "Restaurant sanitation score"),
xaxis = list(title = "Borough"),
shapes = list(
list(type = "line", y0 = 14, y1 = 14, x0 = -0.5, x1 = 4.5, line = list(width = 1, dash = "dash")),
list(type = "line", y0 = 28, y1 = 28, x0 = -0.5, x1 = 4.5, line = list(width = 1, dash = "dash"))
)
) |>
add_annotations(
x = -0.5, y = 7, xref = "x", yref = "y", text = "A\n0-13", showarrow = F, font = list(size = 14)
) |>
add_annotations(
x = -0.5, y = 21, xref = "x", yref = "y", text = "B\n14-27", showarrow = F, font = list(size = 14)
) |>
add_annotations(
x = -0.5, y = 35, xref = "x", yref = "y", text = "C\n28+", showarrow = F, font = list(size = 14)
)
```
### Scores by cuisine of ZIP codes near me
```{r scores by cuisine of nearby ZIPs}
rest_inspec |>
select(cuisine_description, score, zipcode, dba) |>
filter(!is.na(cuisine_description) & !is.na(score) & cuisine_description != "Not Listed/Not Applicable" & zipcode %in% c(10010, 10016, 10001)) |>
mutate(
cuisine_description = factor(cuisine_description),
cuisine_description = fct_reorder(cuisine_description, score),
cuisine_description = fct_recode(cuisine_description, "Latin" = "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Bottled beverages" = "Bottled beverages, including water, sodas, juices, etc.", "Cafe/Coffee/Tea" = "Café/Coffee/Tea"),
description = str_c("Business: ", dba, "\n Score: ", score, "\nZIP code: ", zipcode)
) |>
plot_ly(x = ~cuisine_description, y = ~score, color = ~cuisine_description, type = "box", text = ~description) |>
layout(
title = "Scores of restaurants in ZIP codes near me",
yaxis = list(title = "Restaurant sanitation scores"),
xaxis = list(title = "Cuisines"),
shapes = list(
list(type = "line", y0 = 14, y1 = 14, x0 = 0, x1 = 60, line = list(width = 1, dash = "dash")),
list(type = "line", y0 = 28, y1 = 28, x0 = 0, x1 = 60, line = list(width = 1, dash = "dash"))
)
) |>
add_annotations(
x = -0.5, y = 6, xref = "x", yref = "y", text = "A\n0-13", showarrow = F
) |>
add_annotations(
x = -0.5, y = 21, xref = "x", yref = "y", text = "B\n14-27", showarrow = F
) |>
add_annotations(
x = -0.5, y = 33, xref = "x", yref = "y", text = "C\n28+", showarrow = F
)
```
Column {data-width=450}
-----------------------------------------------------------------------
### Average score over time by borough, 10/2011 - 10/2017
```{r october 2011 to 2017 avg score by borough}
mn_avg_score <-
rest_inspec |>
select(inspection_date, score, boro) |>
filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "MANHATTAN") |>
group_by(inspection_date) |>
summarize(avg_score = mean(score, na.rm = TRUE))
bk_avg_score <-
rest_inspec |>
select(inspection_date, score, boro) |>
filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "BROOKLYN") |>
group_by(inspection_date) |>
summarize(avg_score = mean(score, na.rm = TRUE))
bx_avg_score <-
rest_inspec |>
select(inspection_date, score, boro) |>
filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "BRONX") |>
group_by(inspection_date) |>
summarize(avg_score = mean(score, na.rm = TRUE))
qn_avg_score <-
rest_inspec |>
select(inspection_date, score, boro) |>
filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "QUEENS") |>
group_by(inspection_date) |>
summarize(avg_score = mean(score, na.rm = TRUE))
si_avg_score <-
rest_inspec |>
select(inspection_date, score, boro) |>
filter(!is.na(inspection_date) & year(inspection_date) != 1900 & boro == "STATEN ISLAND") |>
group_by(inspection_date) |>
summarize(avg_score = mean(score, na.rm = TRUE))
plot_ly(si_avg_score, x = ~inspection_date, y = ~avg_score, name = "Staten Island", type = "scatter", mode = "lines", connectgaps = TRUE) |>
add_trace(data = bk_avg_score, x = ~inspection_date, y = ~avg_score, name = "Brooklyn", type = "scatter", mode = "lines") |>
add_trace(data = bx_avg_score, x = ~inspection_date, y = ~avg_score, name = "The Bronx", type = "scatter", mode = "lines") |>
add_trace(data = qn_avg_score, x = ~inspection_date, y = ~avg_score, name = "Brooklyn", type = "scatter", mode = "lines") |>
add_trace(data = mn_avg_score, x = ~inspection_date, y = ~avg_score, name = "Manhattan", type = "scatter", mode = "lines") |> layout(
title = "Average NYC DOHMH restaurant sanitary inspection scores, 10/2011-10/2017",
yaxis = list(title = "Average restaurant sanitation score"),
xaxis = list(title = "Inspection Date")
)
```
### Restaurants with most critical reports
```{r restaurants with most critical reports}
critical_restaurants_levels <-
rest_inspec |>
filter(critical_flag == "Critical" & year(inspection_date) != 1900) |>
mutate(
dba = case_match(
dba,
"DUNKIN' DONUTS, BASKIN ROBBINS" ~ "DUNKIN' DONUTS",
.default = dba
)
) |>
group_by(dba) |>
summarize(critical_reports = n()) |>
arrange(desc(critical_reports)) |>
head(n = 100) |>
pull(dba)
rest_inspec |>
filter(critical_flag == "Critical" & year(inspection_date) != 1900) |>
mutate(
dba = case_match(
dba,
"DUNKIN' DONUTS, BASKIN ROBBINS" ~ "DUNKIN' DONUTS",
.default = dba
),
year = factor(year(inspection_date))
) |>
group_by(dba, year) |>
summarize(critical_reports = n()) |>
arrange(desc(critical_reports), year) |>
head(n = 100) |>
mutate(
dba = factor(dba),
dba = fct_relevel(dba, critical_restaurants_levels)
) |>
plot_ly(x = ~dba, y = ~critical_reports, type = "bar", color = ~year, colors = "viridis") |>
layout(
title = "NYC DOHMH critical violations by restaurant",
yaxis = list(title = "Critical violations"),
xaxis = list(title = "Restaurants")
)
```
### Cuisines with the most DOHMH-forced closures
```{r cuisines with most DOHMH closures}
cuisine_levels <-
rest_inspec |>
mutate(
cuisine_description = iconv(cuisine_description, from = 'UTF-8', to = 'ISO-8859-1')
) |>
select(boro, critical_flag, action, dba, cuisine_description, inspection_date) |>
filter(str_detect(action, "[Cc]losed")) |>
group_by(cuisine_description) |>
summarize(closed = n()) |>
arrange(desc(closed)) |>
pull(cuisine_description)
rest_inspec |>
mutate(
cuisine_description = iconv(cuisine_description, from = 'UTF-8', to = 'ISO-8859-1')
) |>
select(boro, critical_flag, action, dba, cuisine_description, inspection_date) |>
filter(str_detect(action, "[Cc]losed")) |>
group_by(boro, cuisine_description) |>
summarize(closed = n()) |>
arrange(desc(closed)) |>
mutate(
cuisine_description = factor(cuisine_description),
cuisine_description = fct_relevel(cuisine_description, cuisine_levels),
cuisine_description = fct_recode(cuisine_description, "Latin" = "Latin (Cuban, Dominican, Puerto Rican, South & Central American)", "Bottled beverages" = "Bottled beverages, including water, sodas, juices, etc.")
) |>
plot_ly(x = ~cuisine_description, y = ~closed, color = ~boro, type = "bar") |>
layout(
title = "Cuisines with the most restaurants closed by the DOHMH",
yaxis = list(title = "Number of restaurants closed"),
xaxis = list(title = "Cuisines")
)
```